library(GGally)
library(ggeffects)
library(gratia)
library(gt)
library(here)
library(mgcv)
library(patchwork)
library(sf)
library(sfdep)
library(tidyverse)
library(viridis)Supplement: Fremont Models
1 Overview
Goal: model the distribution of Fremont sites across watersheds.
Data: (i) site counts per watershed based on records from the State Historic Preservation Offices of Utah and Nevada and (ii) environmental data from the Oregon State University PRISM Group, as well as the USGS.
Method: a GAMM with a negative binomial distribution for count data with dispersion, an an offset to account for the area of each watershed, and an exponential covariance matrix to account for residual spatial autocorrelation.
2 R Preamble
3 Data
Unit of analysis: watersheds
Dependent variable:
- a count of sites per watershed
Independent variables:
- elevation
- maize growing degree days (gdd, in Celsius)
- precipitation (millimeters)
- cost-distance (in hours) to springs and streams
- protected status
- cost-distance (in hours) to roads
The climate variables, precipitation and gdd, were hindcasted for each watershed using Kyle Bocinsky’s paleocar package and averaged over the approximately 1,000 year occupational sequence of the Fremont in the project area. The cost-distance variables are derived from elevation data using Campbell’s hiking function and Djikstra’s search algorithm and averaged over each watershed. Protected status refers to the proportion of each watershed that is federal land. Note that it and cost-distance to roads are not explanatory variables, but rather controls on potential sources of sampling bias, mostly due to taphonomic processes operating on the archaeological record - basically, modern farms and cities.
Offsets:
- area of each watershed
This is included to account for sampling bias owing to the size of each watershed. The other two are there to account for sampling bias owing to variable survey intensity, as well as the potential for human impacts to cultural resources.
Note: not all of these variables make it into the final model as we first evaluate them for potential colinearity and concurvity. An additional concurvity test is performed after fitting the final model. Not all of them have smooths applied in the final model, either.
gpkg <- here("data", "western-fremont.gpkg")
utah <- read_sf(gpkg, "utah")
watersheds <- read_sf(gpkg, "watersheds") |>
mutate(
huc4 = str_sub(id, 1, 4),
huc4 = factor(huc4)
)Code
max_density_obs <- with(watersheds, max(sites/area))
obs <- ggplot() +
geom_sf(
data = utah,
fill = "gray95"
) +
geom_sf(
data = watersheds,
aes(fill = (sites/area)/max_density_obs),
color = "gray90",
linewidth = 0.2
) +
scale_fill_viridis(
name = "Relative\nSite Density",
option = "mako",
limits = c(0, 1)
) +
# coord_sf(expand = FALSE) +
theme_void() +
theme(
legend.background = element_rect(fill = "gray95", color = "transparent"),
legend.justification = c("left", "top"),
legend.position = c(0.66, 0.57)
)
obsThis map shows the relative density of sites based on their observed frequency in the study area. If site density is the ratio of the number of sites in a watershed to the area of that watershed, the relative density is the density estimate for each watershed divided by the maximum density estimate across all watersheds. It is more appropriate to visualize this value as it doesn’t have the same strong implications as visualizing the counts. (1) By visualizing the density, it does not make assumptions about the absence of sampling bias owing to the area of each watershed. (2) By visualizing the relative density, it does not make assumptions about the total occurrence rate of sites across the study area, i,e, it does not include an estimate of the intercept.
3.1 Test for colinearity
Code
diagonal_labels <- function(data, mapping, ...) {
GGally::ggally_text(
rlang::as_label(mapping$x),
col = "black",
size = 4
)
}
correlations <- function(data, mapping, color = "black", ...) {
# get the x and y data to use the other code
x <- GGally::eval_data_col(data, mapping$x)
y <- GGally::eval_data_col(data, mapping$y)
ct <- cor.test(x,y)
r <- unname(ct$estimate)
rt <- format(r, digits=2)[1]
tt <- as.character(rt)
tt <- stringr::str_c(
tt,
GGally::signif_stars(ct$p.value)
)
# plot the cor value
ggally_text(
label = tt,
mapping = aes(),
xP = 0.5,
yP = 0.5,
size = 4,
color = color,
...
)
}
# there's a bug in ggpairs() where it mislabels the y-axis, so we remove the
# text and ticks from this plot
watersheds |>
st_drop_geometry() |>
select(elevation, precipitation, gdd, streams, springs, protected, roads) |>
ggpairs(
diag = list(continuous = diagonal_labels),
lower = list(continuous = wrap("points", alpha = 0.3)),
upper = list(continuous = correlations)
) +
theme_bw(12) +
theme(
panel.grid = element_blank(),
strip.background = element_blank(),
strip.text = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank()
)As you can see, there are strong, linear correlations between all the variables, including the familiar inverse relationship between precipitation and gdd, which is driven by elevation in the American West. Based on this and a previous post-hoc concurvity test, we will drop springs.
3.2 Map of covariates
Code
make_map <- function(fill, name) {
ggplot() +
geom_sf(
data = utah,
fill = "gray95"
) +
geom_sf(
data = watersheds,
aes(fill = {{fill}}),
color = "gray90",
linewidth = 0.2
) +
scale_fill_viridis(
name = name,
option = "mako"
) +
theme_void() +
theme(
legend.background = element_rect(fill = "gray95", color = "transparent"),
legend.justification = c("left", "bottom"),
legend.position = c(0.65, 0.19)
)
}
for_print <-
(make_map(gdd, "GDD (°C)") + make_map(precipitation, "PPT (mm)")) /
(make_map(streams, "Streams\n(hours)") + make_map(protected, "Protected"))
bb8 <- st_union(watersheds, utah) |> st_bbox()
dy <- bb8[["ymax"]] - bb8[["ymin"]]
dx <- bb8[["xmax"]] - bb8[["xmin"]]
ggsave(
plot = for_print,
filename = here("figures", "covariates.png"),
width = 7,
height = 7 * (dy/dx),
dpi = 300
)
(make_map(gdd, "GDD (°C)") +
make_map(precipitation, "PPT (mm)") +
make_map(streams, "Streams\n(hours)")) /
(make_map(springs, "Springs\n(hours)") +
make_map(protected, "Protected") +
make_map(roads, "Roads\n(hours)"))3.3 Bocinsky 2014 Thresholds
Code
thresholds <- tibble(
xintercept = c(300, 1000),
x = c(320, 1020),
y = 72,
variable = c("precipitation", "gdd"),
label = c("Minimum threshold\n300 mm", "Minimum threshold\n1000°C GDD")
)
values <- watersheds |>
st_drop_geometry() |>
select(precipitation, gdd) |>
pivot_longer(
everything(),
names_to = "variable"
)
pretty_breaks <- function(x) {
xmin <- round(min(x), -2)
xmax <- round(max(x), -2)
seq(xmin, xmax, by = 200)
}
threshold_dists <- ggplot() +
geom_histogram(
data = values,
aes(value),
color = "white",
binwidth = 100,
center = 50
) +
geom_vline(
data = thresholds,
aes(xintercept = xintercept),
linewidth = 1.2,
color = "darkred"
) +
geom_text(
data = thresholds,
aes(x, y, label = label),
color = "darkred",
hjust = 0,
vjust = 1
) +
facet_wrap(
~variable,
scale = "free_x",
labeller = labeller(
variable = c(precipitation = "Precipitation (mm)",
gdd = "Maize GDD (°C)")
),
strip.position = "bottom"
) +
scale_x_continuous(breaks = pretty_breaks) +
labs(
x = NULL,
y = "Count of watersheds"
) +
theme_bw(12) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size = rel(1)),
strip.placement = "outside"
)
ggsave(
plot = threshold_dists,
filename = here("figures", "threshold_distributions.png"),
width = 7,
height = 7 * (dy/(2*dx)),
dpi = 300
)
threshold_distsHere’s what that threshold looks like in geographic space.
watersheds <- watersheds |>
mutate(
in_niche = ifelse(gdd >= 1000 & precipitation >= 300, "Yes", "No"),
in_niche = factor(in_niche)
)Code
threshold_map <- ggplot() +
geom_sf(
data = utah,
fill = "gray95"
) +
geom_sf(
data = watersheds,
aes(fill = in_niche),
color = "gray90",
linewidth = 0.2
) +
scale_fill_viridis(
name = "In Niche?",
option = "mako",
end = 0.75,
discrete = TRUE
) +
theme_void() +
theme(
legend.background = element_rect(fill = "gray95", color = "transparent"),
legend.justification = c("left", "top"),
legend.position = c(0.66, 0.57)
)
# ggsave(
# plot = (obs + threshold_map),
# filename = here("figures", "threshold_map.png"),
# width = 7,
# height = 7 * (dy/(2*dx)),
# dpi = 300
# )
obs + threshold_mapIs there a significant difference in elevation between those in the niche and those outside it?
with(watersheds, t.test(elevation ~ in_niche))
Welch Two Sample t-test
data: elevation by in_niche
t = -2.3667, df = 179.93, p-value = 0.01901
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-204.15616 -18.51095
sample estimates:
mean in group No mean in group Yes
1763.670 1875.003
4 Analysis
Here, we go through iterations of model fitting and evaluation, with changes to correct for problems like autocorrelation and concurvity (the smoothed version of co-linearity in the predictors). For each fitted model, we provide a model summary, diagnostic plots, and visualizations of partial effects (sometimes called marginal responses) for each smooth term.
fm <- sites ~
s(precipitation, k=5) +
s(gdd, k=5) +
s(streams, k=5) +
s(protected, k=5) +
offset(log(area))
fremont <- gam(
fm,
data = watersheds,
family = poisson,
select = TRUE
)summary(fremont)
Family: poisson
Link function: log
Formula:
sites ~ s(precipitation, k = 5) + s(gdd, k = 5) + s(streams,
k = 5) + s(protected, k = 5) + offset(log(area))
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.18440 0.03191 -131.1 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(precipitation) 2.918 4 620.9 <2e-16 ***
s(gdd) 3.948 4 335.7 <2e-16 ***
s(streams) 3.900 4 181.2 <2e-16 ***
s(protected) 2.024 4 271.9 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) = 0.248 Deviance explained = 36.8%
UBRE = 13.011 Scale est. = 1 n = 183
appraise(fremont)draw(fremont, residuals = TRUE)4.1 Dispersion
First, an h-test for scaled dispersion in a poisson model. In a poisson model, the variance is supposed to equal the mean. If this holds, then \(\alpha\) in \(Var(y) = E(y) + \alpha \cdot E(y)\) should equal zero. A simple, intercept-only linear model can test this idea.
# code adopted from AER::dispersiontest()
observed <- model.response(model.frame(fremont))
estimate <- fitted(fremont)
variance <- ((observed - estimate)^2 - observed)/estimate
dispersion_model <- lm(variance ~ 1)Code
tibble(
estimate = as.vector(coefficients(dispersion_model)[1]),
statistic = as.vector(summary(dispersion_model)$coef[1, 3]),
null = 0,
p.value = pnorm(statistic, lower.tail = FALSE)
) |>
mutate(across(everything(), round, digits = 4)) |>
gt(
rowname_col = NULL
) |>
tab_caption(
caption = gt::html("<p>t-test for Overdispersion<br>Alternative: α > 0</p>")
) |>
tab_options(
table.align = "left",
container.width = pct(35)
)| estimate | statistic | null | p.value |
|---|---|---|---|
| 16.1624 | 3.7909 | 0 | 1e-04 |
Clearly, there is over-dispersion in this model. To correct for this, we re-run the model with a negative binomial error distribution, letting the model adjust to \(\alpha\). We choose a negative binomial over a quasipoisson as it uses maximum likelihood.
fremont <- gam(
fm,
data = watersheds,
family = nb,
select = TRUE
)summary(fremont)
Family: Negative Binomial(0.7)
Link function: log
Formula:
sites ~ s(precipitation, k = 5) + s(gdd, k = 5) + s(streams,
k = 5) + s(protected, k = 5) + offset(log(area))
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.02417 0.09364 -42.98 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(precipitation) 3.1185 4 51.682 < 2e-16 ***
s(gdd) 0.9617 4 24.523 1.54e-06 ***
s(streams) 0.8993 4 8.814 0.001473 **
s(protected) 1.1569 4 12.757 0.000205 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) = 0.095 Deviance explained = 26.8%
-REML = 616.95 Scale est. = 1 n = 183
appraise(fremont)draw(fremont, residuals = TRUE)4.2 Residual Spatial Autocorrelation
Monte Carlo simulations of Moran’s I to test for spatial autocorrelation in the residuals (using sfdep).
neighbors <- st_contiguity(watersheds)
weights <- st_weights(neighbors)
global_moran_perm(
residuals(fremont),
neighbors,
weights
)
Monte-Carlo simulation of Moran I
data: x
weights: listw
number of simulations + 1: 500
statistic = 0.24234, observed rank = 500, p-value < 2.2e-16
alternative hypothesis: two.sided
There’s significant spatial autocorrelation. To fix this, we will use a generalized additive mixed-model (GAMM) with an exponential spatial correlation structure. This is not strictly correct, as it assumes spatial continuity of points, not polygons, but it’s a useful first approximation.
4.3 Spatial Correlation Structure
Add coordinates for watershed centroids to the dataset.
xy <- watersheds |>
st_centroid() |>
st_coordinates() |>
as_tibble() |>
rename_with(tolower)
watersheds <- bind_cols(watersheds, xy)fremont <- gamm(
fm,
correlation = corExp(form = ~x+y),
data = watersheds,
family = nb,
verbosePQL = FALSE
)summary(fremont$lme)Linear mixed-effects model fit by maximum likelihood
Data: data
AIC BIC logLik
644.2338 679.5382 -311.1169
Random effects:
Formula: ~Xr - 1 | g
Structure: pdIdnot
Xr1 Xr2 Xr3
StdDev: 6.702497 6.702497 6.702497
Formula: ~Xr.0 - 1 | g.0 %in% g
Structure: pdIdnot
Xr.01 Xr.02 Xr.03
StdDev: 0.0002630697 0.0002630697 0.0002630697
Formula: ~Xr.1 - 1 | g.1 %in% g.0 %in% g
Structure: pdIdnot
Xr.11 Xr.12 Xr.13
StdDev: 4.984765e-05 4.984765e-05 4.984765e-05
Formula: ~Xr.2 - 1 | g.2 %in% g.1 %in% g.0 %in% g
Structure: pdIdnot
Xr.21 Xr.22 Xr.23 Residual
StdDev: 6.639019e-05 6.639019e-05 6.639019e-05 1.216056
Correlation Structure: Exponential spatial correlation
Formula: ~x + y | g/g.0/g.1/g.2
Parameter estimate(s):
range
9140.882
Variance function:
Structure: fixed weights
Formula: ~invwt
Fixed effects: list(fixed)
Value Std.Error DF t-value p-value
X(Intercept) -4.034227 0.1192318 178 -33.83517 0.0000
Xs(precipitation)Fx1 0.705309 0.4961590 178 1.42154 0.1569
Xs(gdd)Fx1 0.874521 0.2111328 178 4.14204 0.0001
Xs(streams)Fx1 -0.404601 0.1579876 178 -2.56097 0.0113
Xs(protected)Fx1 0.423848 0.1341687 178 3.15907 0.0019
Correlation:
X(Int) Xs(prc)F1 Xs(g)F1 Xs(s)F1
Xs(precipitation)Fx1 0.037
Xs(gdd)Fx1 -0.023 0.243
Xs(streams)Fx1 0.053 0.193 -0.291
Xs(protected)Fx1 -0.053 0.104 0.381 -0.189
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-0.8019785 -0.6668399 -0.3725204 0.3431171 5.2097590
Number of Observations: 183
Number of Groups:
g g.0 %in% g
1 1
g.1 %in% g.0 %in% g g.2 %in% g.1 %in% g.0 %in% g
1 1
summary(fremont$gam)
Family: negative binomial
Link function: log
Formula:
sites ~ s(precipitation, k = 5) + s(gdd, k = 5) + s(streams,
k = 5) + s(protected, k = 5) + offset(log(area))
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.0342 0.1179 -34.21 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(precipitation) 3.255 3.255 11.249 8.84e-07 ***
s(gdd) 1.000 1.000 17.542 4.46e-05 ***
s(streams) 1.000 1.000 6.706 0.01041 *
s(protected) 1.000 1.000 10.204 0.00166 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) = 0.0979
Scale est. = 1.4788 n = 183
appraise(fremont$gam)draw(fremont$gam)By accounting for spatial autocorrelation, the expected degrees of freedom for gdd and cost-distance to streams are now both 1, which suggests that there are no non-linear effects for those covariates.
4.4 Concurvity
A post-hoc test of non-linear correlations in the predictors.
concurvity(fremont$gam) para s(precipitation) s(gdd) s(streams) s(protected)
worst 1.163589e-23 0.7728363 0.7657603 0.7297433 0.4042881
observed 1.163589e-23 0.6578260 0.7496968 0.5998111 0.3815676
estimate 1.163589e-23 0.6023262 0.6815905 0.4765007 0.3365670
Not good…
4.5 Final model
Cost-distance to roads is non-significant, so we’ll remove that variable from the model entirely.
fm <- sites ~
s(precipitation, k=5) +
gdd +
streams +
protected +
offset(log(area))
fremont <- gamm(
fm,
correlation = corExp(form = ~x+y),
data = watersheds,
family = nb,
verbosePQL = FALSE
)summary(fremont$lme)Linear mixed-effects model fit by maximum likelihood
Data: data
AIC BIC logLik
638.0653 663.7412 -311.0327
Random effects:
Formula: ~Xr - 1 | g
Structure: pdIdnot
Xr1 Xr2 Xr3 Residual
StdDev: 6.697139 6.697139 6.697139 1.215706
Correlation Structure: Exponential spatial correlation
Formula: ~x + y | g
Parameter estimate(s):
range
9157.976
Variance function:
Structure: fixed weights
Formula: ~invwt
Fixed effects: list(fixed)
Value Std.Error DF t-value p-value
X(Intercept) -9.967830 1.4015255 178 -7.112129 0.0000
Xgdd 0.004111 0.0009937 178 4.137691 0.0001
Xstreams -0.400974 0.1567197 178 -2.558545 0.0113
Xprotected 1.571773 0.4982798 178 3.154399 0.0019
Xs(precipitation)Fx1 0.703765 0.4958984 178 1.419173 0.1576
Correlation:
X(Int) Xgdd Xstrms Xprtct
Xgdd -0.969
Xstreams 0.211 -0.291
Xprotected -0.566 0.381 -0.189
Xs(precipitation)Fx1 -0.261 0.243 0.193 0.104
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-0.8020315 -0.6669655 -0.3725634 0.3425410 5.2073073
Number of Observations: 183
Number of Groups: 1
summary(fremont$gam)
Family: negative binomial
Link function: log
Formula:
sites ~ s(precipitation, k = 5) + gdd + streams + protected +
offset(log(area))
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -9.9678297 1.3976051 -7.132 2.48e-11 ***
gdd 0.0041114 0.0009909 4.149 5.19e-05 ***
streams -0.4009744 0.1562813 -2.566 0.01113 *
protected 1.5717732 0.4968861 3.163 0.00184 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(precipitation) 3.254 3.254 11.05 1.07e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) = 0.0982
Scale est. = 1.4779 n = 183
appraise(fremont$gam)draw(parametric_effects(fremont$gam))draw(fremont$gam)4.6 Final Moran’s I
One more Moran’s I test to see if we really got a hold of the spatial autocorrelation. We’ll do this with the normalized residuals of the lme inside the GAMM as that incorporates the exponential covariance matrix.
global_moran_perm(
residuals(fremont$lme, type = "normalized"),
neighbors,
weights
)
Monte-Carlo simulation of Moran I
data: x
weights: listw
number of simulations + 1: 500
statistic = 0.037533, observed rank = 407, p-value = 0.372
alternative hypothesis: two.sided
Looks good!
4.7 Variance Inflation Factor
There can’t be concurvity in the model anymore, as there is only one smoothed term. However, we should now check for potential multi-collinearity in the parameteric terms.
# borrowing this code from mgcv.helper
# https://github.com/samclifford/mgcv.helper/blob/master/R/vif.gam.R
vif <- function(object){
obj.sum <- mgcv::summary.gam(object)
# estimate of standard deviation of residuals
s2 <- object$sig2
# data used to fit the model
X <- object$model
# n observations
n <- nrow(X)
# omit the intercept term, it can't inflate variance
v <- -1
# variance in estimates
varbeta <- obj.sum$p.table[v,2]^2
selected_col <- row.names(obj.sum$p.table)[v]
selected_col <- gsub("TRUE", "", selected_col)
# variance of all the explanatory variables
varXj <- apply(X=X[, selected_col],MARGIN=2, var)
# the variance inflation factor, obtained by rearranging
# var(beta_j) = s^2/(n-1) * 1/var(X_j) * VIF_j
VIF <- varbeta/(s2/(n-1)*1/varXj)
tibble::tibble(
variable = names(VIF),
vif = VIF
)
}
vif(fremont$gam)# A tibble: 3 × 2
variable vif
<chr> <dbl>
1 gdd 5.49
2 streams 3.07
3 protected 2.22
The VIF for gdd is a little high, but within acceptable limits.
5 Results
5.1 Marginal response
Here we estimate the marginal responses (also known as partial effects) for each covariate by letting the target covariate vary and holding the other covariates at their means and predicting the site count (the response). Here we use the {ggeffects} package.
Code
margins <- ggpredict(fremont) |>
unclass() |>
bind_rows() |>
filter(group != "area") |>
rename(
"y" = predicted,
"ymin" = conf.low,
"ymax" = conf.high,
"variable" = group
) |>
mutate(
# hacky way to "clip" the ribbon before expanding in ggplot
ymax = ifelse(ymax > 100, 100, ymax)
)
response_labels <- tibble(
variable = c("precipitation", "gdd", "streams", "protected"),
label = c("Precipitation\n(mm)",
"Maize GDD\n(°C)",
"Streams\n(hours)",
"Protected\n(Federal Land)"),
x = with(watersheds, c(max(precipitation), min(gdd), max(streams), min(protected))),
y = max(margins$ymax),
hjust = c(0.95,0.05,0.95,0.05)
)
scale_pretty <- function(x, n = 4L) {
brks <- pretty(x, n = n)
nbrks <- length(brks)
i <- ifelse(
nbrks > n,
c(1, nbrks),
nbrks
)
brks[-i]
}
marginal_response <- ggplot(margins, aes(x, y)) +
geom_ribbon(
aes(ymin = ymin, ymax = ymax, fill = variable)
) +
scale_fill_manual(
values = c("#4F3824", "#D1603D", "#DDB967", "#848FA5")
) +
geom_line(
linewidth = 2,
color = alpha('white', 0.5),
lineend = "round"
) +
geom_line(
linewidth = 1,
lineend = "round"
) +
geom_label(
data = response_labels,
aes(x, y, label = label),
size = 4,
hjust = response_labels$hjust,
vjust = 1,
label.size = NA,
alpha = 0.5,
lineheight = 0.88
) +
labs(
x = NULL,
y = "Number of sites"
) +
facet_wrap(
~variable,
scale = "free_x"
) +
scale_x_continuous(breaks = scale_pretty) +
coord_cartesian(ylim = c(0, 100)) +
theme_bw(12) +
theme(
axis.title.y = element_text(size = 13),
legend.position = "none",
panel.grid = element_blank(),
strip.background = element_blank(),
strip.text = element_blank()
)
ggsave(
plot = marginal_response,
filename = here("figures", "marginal_response.png"),
width = 5,
height = 5 * 0.93,
dpi = 300
)
marginal_response5.2 Map
watersheds <- watersheds |>
mutate(
estimate = fitted(fremont$gam) |> as.vector(),
residuals = residuals(fremont$lme, type = "normalized")
)Code
basemap <- ggplot() +
geom_sf(
data = utah,
fill = "gray95"
) +
theme_void() +
theme(
legend.background = element_rect(fill = "gray95",
color = "transparent"),
legend.justification = c("left", "bottom"),
legend.position = c(0.65, 0.19)
)
obs <- basemap +
geom_sf(
data = watersheds,
aes(fill = (sites/area)/max_density_obs),
color = "gray90",
linewidth = 0.1
) +
scale_fill_viridis(
name = "Observed",
option = "mako",
limits = c(0, 1)
)
max_density_est <- with(watersheds, max(estimate/area))
estimate <- basemap +
geom_sf(
data = watersheds,
aes(fill = (estimate/area)/max_density_est),
color = "gray90",
linewidth = 0.1
) +
scale_fill_viridis(
name = "Estimated",
option = "mako",
limits = c(0, 1)
)
res <- basemap +
geom_sf(
data = watersheds,
aes(fill = residuals),
color = "gray90",
linewidth = 0.1
) +
scale_fill_viridis(
name = "Residuals",
option = "mako"
)
ggsave(
plot = (obs + estimate),
filename = here("figures", "model-map.png"),
width = 7,
height = 7 * (dy/(2*dx)),
dpi = 300
)
ggsave(
plot = (estimate + threshold_map),
filename = here("figures", "threshold_map.png"),
width = 7,
height = 7 * (dy/(2*dx)),
dpi = 300
)
(obs + estimate) / (res + threshold_map)Please note that the residuals are still being shown for the count-level data.
6 Session Info
Code
# save the session info as an object
pkg_sesh <- sessioninfo::session_info(pkgs = "attached")
# inject the quarto info
pkg_sesh$platform$quarto <- paste(
quarto::quarto_version(),
"@",
quarto::quarto_path()
)
# print it out
pkg_sesh─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.2.1 (2022-06-23 ucrt)
os Windows 10 x64 (build 22621)
system x86_64, mingw32
ui RTerm
language (EN)
collate English_United States.utf8
ctype English_United States.utf8
tz America/Denver
date 2023-04-30
pandoc 2.19.2 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
quarto 1.2.329 @ C:\\PROGRA~1\\Quarto\\bin\\quarto.exe
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.2.3)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.2.2)
GGally * 2.1.2 2021-06-21 [1] CRAN (R 4.2.2)
ggeffects * 1.2.1 2023-04-02 [1] CRAN (R 4.2.3)
ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.2.3)
gratia * 0.8.1 2023-02-02 [1] CRAN (R 4.2.2)
gt * 0.9.0 2023-03-31 [1] CRAN (R 4.2.3)
here * 1.0.1 2020-12-13 [1] CRAN (R 4.2.1)
lubridate * 1.9.2 2023-02-10 [1] CRAN (R 4.2.2)
mgcv * 1.8-42 2023-03-02 [1] CRAN (R 4.2.2)
nlme * 3.1-157 2022-03-25 [2] CRAN (R 4.2.1)
patchwork * 1.1.2 2022-08-19 [1] CRAN (R 4.2.1)
purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.2)
readr * 2.1.4 2023-02-10 [1] CRAN (R 4.2.2)
sf * 1.0-12 2023-03-19 [1] CRAN (R 4.2.3)
sfdep * 0.2.3 2023-01-11 [1] CRAN (R 4.2.2)
stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.2.1)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.3)
tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.2)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.2.2)
viridis * 0.6.2 2021-10-13 [1] CRAN (R 4.2.2)
viridisLite * 0.4.1 2022-08-22 [1] CRAN (R 4.2.2)
[1] C:/Users/kenne/AppData/Local/R/win-library/4.2
[2] C:/Program Files/R/R-4.2.1/library
──────────────────────────────────────────────────────────────────────────────